package ctagsinterface.index;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import javax.swing.JOptionPane;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexNotFoundException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.gjt.sp.jedit.GUIUtilities;
import org.gjt.sp.jedit.jEdit;
import org.gjt.sp.util.Log;
import ctagsinterface.main.Logger;
import ctagsinterface.main.Tag;
/*
* TagIndex manages a Lucece index with the tag information.
* Usage:
* --- General ---
* - When the plugin is started, create a new TagIndex object.
* - When the plugin is stopped, call close() to release any resources.
* --- Index creation ---
* - Before indexing a set of files, call startActivity().
* - Add tags for the set of files using insertTag().
* Use getOrigin() to get an object representing the origin of the tags.
* - When done, call endActivity(). This will commit the changes to make
* them available for searching.
* --- Searching the index ---
* - Call queryTag() with the tag name and a list of tags (for output).
*/
public class TagIndex
{
public static final String ORIGIN_FLD = "origin";
public static final String _ORIGIN_FLD = "_origin";
public static final String TYPE_FLD = "type";
public static final String DOCTYPE_FLD = "doctype";
public static final String _PATH_FLD = "_path";
public static final String PATH_FLD = "path";
public static final String PATTERN_FLD = "pattern";
public static final String _NAME_FLD = "_name";
public static final String _NAME_LOWERCASE_FLD = "_nameLC";
public static final String LINE_FLD = "line";
public static final String ORIGIN_DOC_TYPE = "origin";
public static final String TAG_DOC_TYPE = "tag";
public static final String ORIGIN_ID_FLD = "id";
public static final String LANGUAGE = "language";
public static final int MAX_RESULTS = 1000;
private FSDirectory directory;
private IndexWriter writer;
private PerFieldAnalyzerWrapper analyzer;
private StandardAnalyzer standardAnalyzer;
private KeywordAnalyzer keywordAnalyzer;
private static final String[] FIXED_FIELDS = {
_NAME_FLD, _NAME_LOWERCASE_FLD, PATTERN_FLD, PATH_FLD,
_PATH_FLD, DOCTYPE_FLD, ORIGIN_FLD, _ORIGIN_FLD
};
private static Set<String> fixedFields;
private int writeCount;
public enum OriginType
{
PROJECT("Project"),
DIRECTORY("Directory"),
ARCHIVE("Archive"),
TAGFILE("TagFile"),
MISC("Misc");
private OriginType(String name)
{
this.name = name;
}
public static OriginType fromString(String s)
{
for (OriginType type: OriginType.values())
if (type.name.equals(s))
return type;
return OriginType.MISC;
}
public String name;
}
public TagIndex() throws RuntimeException
{
File path = new File(getIndexPath());
path.mkdirs();
standardAnalyzer = new StandardAnalyzer(Version.LUCENE_30,
new HashSet<String>());
keywordAnalyzer = new KeywordAnalyzer();
analyzer = new PerFieldAnalyzerWrapper(standardAnalyzer);
// Tag documents
analyzer.addAnalyzer(_NAME_FLD, keywordAnalyzer);
analyzer.addAnalyzer(_NAME_LOWERCASE_FLD, keywordAnalyzer);
analyzer.addAnalyzer(_PATH_FLD, keywordAnalyzer);
analyzer.addAnalyzer(_ORIGIN_FLD, keywordAnalyzer);
// Origin documents
analyzer.addAnalyzer(ORIGIN_ID_FLD, keywordAnalyzer);
analyzer.addAnalyzer(TYPE_FLD, keywordAnalyzer);
fixedFields = new HashSet<String>();
for (String s: FIXED_FIELDS)
fixedFields.add(s);
writeCount = 0;
try
{
directory = FSDirectory.open(path);
if (IndexWriter.isLocked(directory))
{
Log.log(Log.WARNING, this, "The lucene index at " + path.getAbsolutePath() + " is locked");
int ret = GUIUtilities.confirm(jEdit.getActiveView(),
"lucene.index.locked", new Object[]{path},
JOptionPane.YES_NO_OPTION, JOptionPane.ERROR_MESSAGE);
if (ret == JOptionPane.YES_OPTION)
IndexWriter.unlock(directory);
}
writer = new IndexWriter(directory, analyzer,
IndexWriter.MaxFieldLength.UNLIMITED);
}
catch (IOException e) { e.printStackTrace(); }
}
private static String getIndexPath() throws RuntimeException
{
String settings = jEdit.getSettingsDirectory();
if (settings == null || settings.isEmpty())
throw new RuntimeException("CtagsInterface plugin cannot work without a settings directory.");
return jEdit.getSettingsDirectory() + File.separator +
"CtagsInterface" + File.separator + "index";
}
public void startActivity()
{
synchronized(this)
{
writeCount++;
}
}
public void endActivity()
{
synchronized(this)
{
writeCount--;
if (writeCount == 0)
{
try { writer.commit(); }
catch (IOException e) { e.printStackTrace(); }
}
}
}
public void close()
{
try { writer.close(); }
catch (Exception e) { e.printStackTrace(); }
}
public void getOrigins(OriginType type, final List<String> origins)
{
String query = DOCTYPE_FLD + ":" + ORIGIN_DOC_TYPE + " AND " +
TYPE_FLD + ":" + type.name;
runQuery(query, MAX_RESULTS, new DocHandler()
{
public void handle(Document doc)
{
origins.add(doc.get(ORIGIN_ID_FLD));
}
});
}
public Vector<String> getOrigins(OriginType type)
{
Vector<String> originsList = new Vector<String>();
getOrigins(type, originsList);
return originsList;
}
public String getOriginsOfFile(String file)
{
final StringBuilder sb = new StringBuilder();
String query = DOCTYPE_FLD + ":" + TAG_DOC_TYPE + " AND " +
_PATH_FLD + ":" + escape(file);
runQuery(query, 1, new DocHandler()
{
public void handle(Document doc)
{
sb.append(doc.get(ORIGIN_FLD));
}
});
return sb.toString();
}
public void getFilesOfOrigin(OriginType type, final List<String> filePaths)
{
String query = DOCTYPE_FLD + ":" + TAG_DOC_TYPE + " AND " +
ORIGIN_FLD + ":" + type.name;
runQuery(query, MAX_RESULTS, new DocHandler()
{
public void handle(Document doc)
{
if (!filePaths.contains(doc.get(_PATH_FLD)))
filePaths.add(doc.get(_PATH_FLD));
}
});
}
public Vector<String> getFilesOfOrigin(OriginType type)
{
Vector<String> filePathsList = new Vector<String>();
getFilesOfOrigin(type, filePathsList);
return filePathsList;
}
public String appendOrigin(String origins, String origin)
{
if (origin.length() > origins.length())
return origins + origin;
// Check end of origins string
int index = origins.lastIndexOf(origin);
if (index >= 0 && index + origin.length() == origins.length())
return origins;
// Check middle of origins string
String originInMiddle = origin + Origin.SEP;
if (origins.indexOf(originInMiddle) >= 0)
return origins;
return origins + origin;
}
/**
* Completely delete tags from source file
* @param path to file
*/
public void deleteTagsFromSourceFile(String filePath)
{
String s = _PATH_FLD + ":" + escape(filePath);
deleteQuery(s);
}
/**
* Delete all tags from a source file of origin >>MISC:temp.
* If a tag belongs to multiple origins, only remove the specified origin from it.
* @param logger
* @param path to file
* @param origin
*/
public void deleteTagsFromSourceFileOfOrigin(Logger logger, String filePath)
{
Origin origin = getOrigin(OriginType.MISC, "temp", true);
deleteTagsFromSourceFileOfOrigin(logger, filePath, origin);
}
/**
* Delete all tags from a source file that belong only to the specified origin.
* If a tag belongs to multiple origins, only remove the specified origin from it.
* @param logger
* @param path to file
* @param origin
*/
public void deleteTagsFromSourceFileOfOrigin(Logger logger, String filePath, Origin origin)
{
deleteTagsOfOriginAndFilePath(logger, origin, filePath);
}
public void deleteTag(Tag tag)
{
String s = _NAME_FLD + ":" + escape(tag.getName()) + " AND " +
_PATH_FLD + ":" + escape(tag.getFile()) + " AND " +
PATTERN_FLD + ":" + escape(tag.getPattern());
deleteQuery(s);
}
public void deleteQuery(Query q)
{
if (q != null)
{
try {
writer.deleteDocuments(q);
}
catch (IOException e) { e.printStackTrace();}
}
}
public void deleteQuery(String s)
{
Query q = getQuery(s);
if (q != null)
{
try {
writer.deleteDocuments(q);
}
catch (IOException e) { e.printStackTrace();}
}
}
public void getIdenticalTags(Tag tag, List<Tag> tags)
{
StringBuilder q = new StringBuilder(_NAME_FLD + ":" +
escape(tag.getName()));
q.append(" AND " + _PATH_FLD + ":" + escape(tag.getFile()));
Set<String> extensions = tag.getExtensions();
if (extensions != null)
{
for (String s: extensions)
{
if (! s.equals(LINE_FLD))
q.append(" AND " + s + ":" + escape(tag.getExtension(s)));
}
}
queryTags(q.toString(), MAX_RESULTS, tags);
}
/*
* Delete all tags that belong only to the specified origin. If a tag
* belongs to multiple origins, only remove the specified origin from it.
*/
public void deleteTagsOfOrigin(Logger logger, final Origin origin)
{
deleteTagsOfOriginAndFilePath(logger, origin, null);
}
/*
* Delete all tags that belong only to the specified origin and source file if set.
* If a tag belongs to multiple origins, only remove the specified origin from it.
*/
public void deleteTagsOfOriginAndFilePath(Logger logger, final Origin origin, String filePath)
{
// File path should only be included when dealing with MISC:temp
// used as proxy for origin ID
String filePathStr = "";
if(filePath!=null) {
if (origin.toString().equals(">>MISC:temp"))
filePathStr = " AND " + _PATH_FLD + ":" + filePath;
}
startActivity();
// Delete the tags which belong only to the specified origin.
// Using _ORIGIN_FLD for a precise match.
String s = DOCTYPE_FLD + ":" + TAG_DOC_TYPE + " AND " +
_ORIGIN_FLD + ":" + escape(origin.toString()) + filePathStr;
Query q = getQuery(s);
if (q != null)
{
try
{
writer.deleteDocuments(q);
writer.commit(); // Tags show up in next query if no commit here
}
catch (IOException e) { e.printStackTrace(); }
}
// Remove the specified origin from remaining tags.
// Using ORIGIN_FLD for a substring match.
s = DOCTYPE_FLD + ":" + TAG_DOC_TYPE + " AND " +
ORIGIN_FLD + ":" + escape(origin.toString()) + filePathStr;
runQuery(s, MAX_RESULTS, new DocHandler()
{
public void handle(Document doc)
{
// Try the end of the string
String origins = doc.get(ORIGIN_FLD);
String s = origin.toString();
int index = origins.lastIndexOf(s);
if (index < 0)
return;
String newValue;
if (index + s.length() == origins.length())
newValue = origins.substring(0, index);
else
{
index = origins.indexOf(s + Origin.SEP);
if (index < 0)
return;
newValue = origins.substring(0, index) +
origins.substring(index + s.length());
}
// Create a query for deleting this document, then delete it
// and re-add it.
String queryStr = DOCTYPE_FLD + ":" + doc.get(DOCTYPE_FLD) + " AND " +
_NAME_FLD + ":" + escape(doc.get(_NAME_FLD)) + " AND " +
_PATH_FLD + ":" + escape(doc.get(_PATH_FLD)) + " AND " +
LINE_FLD + ":" + doc.get(LINE_FLD);
Query q = getQuery(queryStr);
if (q != null)
{
try
{
writer.deleteDocuments(q);
}
catch (IOException e) {
e.printStackTrace();
}
}
doc.removeField(ORIGIN_FLD);
doc.removeField(_ORIGIN_FLD);
addTagOrigins(doc, newValue);
try { writer.addDocument(doc); }
catch (IOException e) { e.printStackTrace(); }
}
});
endActivity();
}
public static String escape(String s)
{
if (s.equals("AND") || s.equals("OR") || s.equals("NOT"))
return "\"" + s + "\"";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); i++)
{
char c = s.charAt(i);
switch (c)
{
case '+': case '-': case '!': case '(': case ')':
case '{': case '}': case '[': case ']': case '^':
case '"': case '~': case '*': case '?': case ':':
case '\\': case ' ':
sb.append('\\');
break;
case '&':
case '|':
if (i < s.length() - 1 && s.charAt(i + 1) == c)
sb.append('\\');
break;
}
sb.append(c);
}
return sb.toString();
}
// Deletes an origin and all its associated data from the index
public void deleteOrigin(Logger logger, Origin origin)
{
startActivity();
deleteTagsOfOrigin(logger, origin);
String s = DOCTYPE_FLD + ":" + ORIGIN_DOC_TYPE + " AND " +
TYPE_FLD + ":" + origin.type.name + " AND " + ORIGIN_ID_FLD + ":" +
escape(origin.id);
Query q = getQuery(s);
if (q != null)
{
try { writer.deleteDocuments(q); writer.optimize(); }
catch (IOException e) { e.printStackTrace(); }
}
endActivity();
}
public Origin getOrigin(OriginType type, String id,
boolean createIfNotExists)
{
Origin origin = new Origin(type, id);
if (! createIfNotExists)
return origin;
// Create an origin document if needed
final boolean b[] = new boolean[1];
b[0] = false;
String query = DOCTYPE_FLD + ":" + ORIGIN_DOC_TYPE + " AND " +
TYPE_FLD + ":" + type.name + " AND " + ORIGIN_ID_FLD + ":" +
escape(id);
runQuery(query, 1, new DocHandler() {
public void handle(Document doc)
{
b[0] = true;
}
});
if (! b[0])
{
// Create a document for this origin
startActivity();
Document doc = new Document();
doc.add(new Field(DOCTYPE_FLD, ORIGIN_DOC_TYPE, Store.YES, Index.ANALYZED));
doc.add(new Field(TYPE_FLD, type.name, Store.YES, Index.ANALYZED));
doc.add(new Field(ORIGIN_ID_FLD, id, Store.YES, Index.ANALYZED));
try { writer.addDocument(doc); }
catch (IOException e) { e.printStackTrace(); }
endActivity();
}
return origin;
}
public void insertTag(Tag t, String originsStr)
{
Document doc = tagToDocument(t, originsStr);
try { writer.addDocument(doc); }
catch (Exception e) { e.printStackTrace(); }
}
public boolean hasOrigin(Origin origin)
{
final boolean b[] = new boolean[1];
b[0] = false;
String query = DOCTYPE_FLD + ":" + ORIGIN_DOC_TYPE + " AND " +
TYPE_FLD + ":" + origin.type.name + " AND " + ORIGIN_ID_FLD + ":" +
escape(origin.id);
runQuery(query, 1, new DocHandler() {
public void handle(Document doc)
{
b[0] = true;
}
});
return b[0];
}
public boolean hasSourceFile(String file)
{
final boolean b[] = new boolean[1];
b[0] = false;
runQuery(_PATH_FLD + ":" + escape(file), 1, new DocHandler() {
public void handle(Document doc)
{
b[0] = true;
}
});
return b[0];
}
public void queryTag(String name, final List<Tag> tags)
{
if (tags == null)
return;
runQuery(_NAME_FLD + ":" + escape(name), MAX_RESULTS, new DocHandler() {
public void handle(Document doc)
{
Tag tag = documentToTag(doc);
tags.add(tag);
}
});
}
public void queryTags(String query, int maxResults, final List<Tag> tags)
{
if (tags == null)
return;
runQuery(query, maxResults, new DocHandler() {
public void handle(Document doc)
{
Tag tag = documentToTag(doc);
tags.add(tag);
}
});
}
private Document tagToDocument(Tag t, String originsStr)
{
Document doc = new Document();
doc.add(new Field(_NAME_FLD, t.getName(), Store.YES, Index.ANALYZED));
doc.add(new Field(_NAME_LOWERCASE_FLD, t.getName().toLowerCase(),
Store.YES, Index.ANALYZED));
doc.add(new Field(PATTERN_FLD, t.getPattern(), Store.YES, Index.ANALYZED));
doc.add(new Field(PATH_FLD, t.getFile(), Store.NO, Index.ANALYZED));
doc.add(new Field(_PATH_FLD, t.getFile(), Store.YES, Index.ANALYZED));
for (String ext: t.getExtensions())
{
String val = t.getExtension(ext);
if (val == null)
val = "";
doc.add(new Field(ext, val, Store.YES, Index.ANALYZED));
}
addTagOrigins(doc, originsStr);
doc.add(new Field(DOCTYPE_FLD, TAG_DOC_TYPE, Store.YES, Index.ANALYZED));
return doc;
}
private Tag documentToTag(Document doc)
{
Tag tag = new Tag(doc.get(_NAME_FLD), doc.get(_PATH_FLD), doc.get(PATTERN_FLD));
Hashtable<String, String> extensions = new Hashtable<String, String>();
for (Fieldable field: doc.getFields())
{
if (fixedFields.contains(field.name()))
continue;
String val = field.stringValue();
if (val == null)
val = "";
extensions.put(field.name(), val);
}
tag.setExtensions(extensions);
Hashtable<String, String> attachments = new Hashtable<String, String>();
attachments.put(ORIGIN_FLD, doc.get(ORIGIN_FLD));
tag.setAttachments(attachments);
return tag;
}
private void addTagOrigins(Document doc, String originsStr)
{
doc.add(new Field(ORIGIN_FLD, originsStr, Store.YES, Index.ANALYZED));
doc.add(new Field(_ORIGIN_FLD, originsStr, Store.NO, Index.ANALYZED));
}
/* Various queries */
public void runQuery(String query, final List<Tag> tags)
{
runQuery(query, MAX_RESULTS, tags);
}
public void runQuery(String query, int maxResults, final List<Tag> tags)
{
runQuery(query, maxResults, new DocHandler()
{
public void handle(Document doc)
{
tags.add(documentToTag(doc));
}
});
}
public void runQueryInOrigins(String query, List<Origin> origins,
int maxResults, final List<Tag> tags)
{
if (origins == null || origins.isEmpty())
return;
boolean isFirst = true;
StringBuilder sb = new StringBuilder("(");
for (Origin origin: origins)
{
if (! isFirst)
sb.append(" OR ");
String escaped = escape(origin.toString());
sb.append(_ORIGIN_FLD + ":*" + escaped + " OR " + _ORIGIN_FLD +
":*" + escaped + Origin.SEP + "*");
isFirst = false;
}
sb.append(")");
if (query != null && (! query.isEmpty()))
sb.append(" AND (" + query + ")");
runQuery(sb.toString(), maxResults, tags);
}
// Returns a query for a tag name in a list of specified origins
// origins: A hash of origin type -> vector of origin names
// tags: List of tags to be filled in by this query
public void queryTagInOrigins(String tag, List<Origin> origins,
final List<Tag> tags)
{
queryTagInOrigins(tag, origins, MAX_RESULTS, tags);
}
public void queryTagInOrigins(String tag, List<Origin> origins,
int maxResults, final List<Tag> tags)
{
runQueryInOrigins(_NAME_FLD + ":" + escape(tag), origins,
maxResults, tags);
}
public String getTagNameQuery(String name)
{
return DOCTYPE_FLD + ":" + TAG_DOC_TYPE + " AND " +
_NAME_FLD + ":" + escape(name);
}
public String getLangNameQuery(String lang) {
return LANGUAGE + ":" + escape(lang);
}
private Query getQuery(String query)
{
Log.log(Log.MESSAGE, TagIndex.class, "Parsing query: " + query);
QueryParser qp = new QueryParser(Version.LUCENE_30, _NAME_FLD, analyzer);
qp.setAllowLeadingWildcard(true);
qp.setLowercaseExpandedTerms(false);
try
{
return qp.parse(query);
}
catch (Exception e)
{
e.printStackTrace();
Log.log(Log.WARNING, TagIndex.class, "Parsing failed for query: " + query);
}
return null;
}
public void runQuery(String query, int maxResults, DocHandler handler)
{
Query q = getQuery(query);
if (q == null)
return;
try
{
Log.log(Log.MESSAGE, TagIndex.class, "Searching query '" + q.toString() + "' started.");
IndexSearcher searcher = new IndexSearcher(directory, true);
TopDocs topDocs = searcher.search(q, maxResults);
Log.log(Log.MESSAGE, TagIndex.class, "Searching query: '" + q.toString() + "' ended.");
Log.log(Log.MESSAGE, TagIndex.class, "Processing of " + topDocs.scoreDocs.length + " query results started.");
for (ScoreDoc scoreDoc: topDocs.scoreDocs)
{
Document doc = searcher.doc(scoreDoc.doc);
handler.handle(doc);
}
Log.log(Log.MESSAGE, TagIndex.class, "Processing query results ended.");
Log.log(Log.MESSAGE, TagIndex.class, "Closing searcher started.");
searcher.close();
Log.log(Log.MESSAGE, TagIndex.class, "Closing searcher ended.");
}
catch (IndexNotFoundException e) { /* ignore */ }
catch (IOException e) { e.printStackTrace(); }
}
public static class Origin
{
private static String SEP = ">>";
public OriginType type;
public String id;
public String s;
public Origin(OriginType type, String id)
{
this.type = type;
this.id = id;
s = SEP + type + ":" + id;
}
public String toString()
{
return s;
}
public static void fromString(String s, List<Origin> origins)
{
int index = s.indexOf(SEP);
if (index < 0)
return;
do
{
int nextIndex = s.indexOf(SEP, index + 1);
String origin;
if (nextIndex >= 0)
origin = s.substring(index, nextIndex);
else
origin = s.substring(index);
String [] parts = origin.substring(SEP.length()).split(":", 2);
origins.add(new Origin(OriginType.valueOf(parts[0]), parts[1]));
index = nextIndex;
}
while (index >= 0);
}
public boolean equals(Origin origin)
{
return (type == origin.type && id.equals(origin.id));
}
}
public interface DocHandler
{
void handle(Document doc);
}
}